from bs4 import BeautifulSoup
import requests
import time

def save_url(url):
    """
    """
    ret = requests.get(url).content.decode('utf-8','ignore')
    soup = BeautifulSoup(ret)
    # s = soup.find_all('div', class_='listmain-list-item1')
    find = soup.find_all('a', text='下一页')
    info = soup.find_all('ul', class_='ll')
    # link = info[0].find_all('a')
    # origin = link.next_sibling.get_text()
    aes = info[0].find_all('a')

    with open('ewood.txt','a') as w:
        for item in aes:
            # aes = item.find_all('a')
            print(item['href'])
            w.write(item['href'])
            w.write("\n")
            next_page = find[0]['href']
        if next_page:
            return 'https://www.ewood.cn' + next_page
        return None

def save_url_all():
    """
    """
    next_page = save_url("https://www.ewood.cn/timber/buy?c=1")
    i = 0
    while next_page:
        next_page = save_url(next_page)
        time.sleep(1)
        print(next_page)
        i += 1
        if i % 10 == 0:
            time.sleep(600)

def save_detail():
    with open('ewood.txt','r') as r:
        """
        """
        i = 0
        with open('ewood_detail.txt','a') as w:
            url = r.readline()
            while url:
                ret = requests.get('https://www.ewood.cn' + url.strip()).content.decode('utf-8','ignore')
                soup = BeautifulSoup(ret)
                text = soup.find('div', class_='atcl')
                td = soup.find('div', class_='cnt')
                h1 = text.find('h1')
                resa = td.get_text()
                resa = ' '.join(resa.split())
                # name = soup.find('span', class_='titlestyle')
                res = h1.get_text() + '&&&' + resa
                print(res)
                w.write(res)
                w.write("\n")
                url = r.readline()
                i += 1
                print(i)
                time.sleep(1)




if __name__ == '__main__':
    # save_url_all()
    save_detail()